knitr::opts_chunk$set(echo = TRUE, 
                      warning = FALSE, 
                      message = FALSE, 
                      fig.align = 'center')
knitr::opts_knit$set(root.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex", 
                     base.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex")
library(data.table)
library(ggplot2)
library(parallel)
library(cowplot)
library(patchwork)
library(GenomicAlignments)
ROC_Tab_1 <- readRDS("./analysis/07.VirusClassification/03.Merge/02.Prediction/Prediction_Table_20210811.Rds")
ROC_Tab_2 <- readRDS("./analysis/07.VirusClassification/03.Merge/02.Prediction/Prediction_Table_20210825.Rds")
ROC_Tab_3 <- readRDS("./analysis/07.VirusClassification/03.Merge/02.Prediction/Prediction_Table_20211008.Rds")

ROC_Tab_1[, read := gsub("read_", "", read)]
ROC_Tab_2[, read := gsub("read_", "", read)]
ROC_Tab_3[, read := gsub("read_", "", read)]

2021-08-11

ROC_Tab_1[PredSpe == Species, .N, Species]

Alignment

bams <- list.files("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/align/batch1", "bam", full.names = TRUE)
SVV_bam <- GenomicAlignments::readGAlignments(file = "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/align/batch1/align_SVV.bam", 
                                              param = Rsamtools::ScanBamParam(what = c("qname", "flag", "mapq", "seq")), use.names = TRUE)

SVV_bam <- SVV_bam[names(SVV_bam) %in% ROC_Tab_1[PredSpe == Species & PredSpe == "SVV", read]]
svv_fa <- readDNAStringSet("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/ref/SVV.fasta")
SVV_depth <- data.table(POS = seq_len(width(svv_fa)), Depth = as.numeric(coverage(SVV_bam)[[1]]))
library(microseq)
library(Biostrings)
fastq <- microseq::readFastq("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch1/guppy/DRS_hac.fastq")
id <- fastq$Header
fastq <- QualityScaledDNAStringSet(x = RNAStringSet(fastq$Sequence), 
                                   quality = PhredQuality(fastq$Quality))
names(fastq) <- id
SVV_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_1[PredSpe == Species & PredSpe == "SVV", read]]
writeQualityScaledXStringSet(SVV_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch1/SVV.fastq.gz", compress = TRUE)

2021-08-25

ROC_Tab_2[PredSpe == Species, .N, Species]
fastq <- microseq::readFastq("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch2_fq/DRS_virus2.fastq")
id <- fastq$Header
fastq <- QualityScaledDNAStringSet(x = RNAStringSet(fastq$Sequence), 
                                   quality = PhredQuality(fastq$Quality))
names(fastq) <- id
SVV_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_2[PredSpe == Species & PredSpe == "SVV", read]]
writeQualityScaledXStringSet(SVV_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch2/SVV.fastq.gz", compress = TRUE)
PRRSV_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_2[PredSpe == Species & PredSpe == "PRRSV", read]]
writeQualityScaledXStringSet(PRRSV_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch2/PRRSV.fastq.gz", compress = TRUE)
Pb_RTA_08_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_2[PredSpe == Species & PredSpe == "PbergheiANKA" & pred == "RTA-08", read]]
writeQualityScaledXStringSet(Pb_RTA_08_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch2/Pb_RTA_08.fastq.gz", compress = TRUE)
Pb_RTA_27_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_2[PredSpe == Species & PredSpe == "PbergheiANKA" & pred == "RTA-27", read]]
writeQualityScaledXStringSet(Pb_RTA_27_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch2/Pb_RTA_27.fastq.gz", compress = TRUE)

2021-10-08

ROC_Tab_3[PredSpe == Species, .N, Species]
fastq <- microseq::readFastq("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch3_fq/DRS_multi3_6sample.fastq")
id <- fastq$Header
fastq <- QualityScaledDNAStringSet(x = RNAStringSet(fastq$Sequence), 
                                   quality = PhredQuality(fastq$Quality))
names(fastq) <- id
Pb_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_3[PredSpe == Species & PredSpe == "PbergheiANKA", read]]
writeQualityScaledXStringSet(Pb_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch3/Pb.fastq.gz", compress = TRUE)
S_enter_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_3[PredSpe == Species & PredSpe == "S_enter", read]]
writeQualityScaledXStringSet(S_enter_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch3/S_enter.fastq.gz", compress = TRUE)
S_cere_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_3[PredSpe == Species & PredSpe == "S_cere", read]]
writeQualityScaledXStringSet(S_cere_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch3/S_cere.fastq.gz", compress = TRUE)
Ecoli_fastq <- fastq[mapply(function(x) x[1], strsplit(names(fastq), " ")) %in% ROC_Tab_3[PredSpe == Species & PredSpe == "Ecoli", read]]
writeQualityScaledXStringSet(Ecoli_fastq, "/mnt/raid61/Personal_data/songjunwei/DRS_RTA/00Split/batch3/Ecoli.fastq.gz", compress = TRUE)


tangchao7498/DecodeR documentation built on May 27, 2023, 7:21 p.m.